/************************************************
Project: Samuel E. Ross Econ Thesis

File: Data Processing

Before running the following file:
	
	-Designate SamuelERossEconThesis/ as the working directory
	
************************************************/

clear
set more off

* set FRED API key for pulling CPI data 
set fredkey "6ec0d73a2965d3f98139934dbb970b6e" 

* set working directory
// cd <insert path to SamuelERossEconThesis/>


*****INTEREST RATES*****

* import interest rate data
import delimited "Data/InputData/interest.csv", clear

* keep only country, time, and value columns
keep ref_area time_period obs_value

* indicator for US vs South Korea
g isUS = 1
replace isUS = 0 if ref_area!="USA"

* reformat time variable as month
g month = monthly(time_period, "YM")
format month %tm

* rename value column and keep only new country, month, and value columns
ren obs_value value
keep isUS month value

* reshape so that each observation is a month
reshape wide value, i(month) j(isUS)
ren value0 korea_interest
ren value1 us_interest

* generate log interest rates
g i = log(korea_interest)
g i_star = log(us_interest)

* keep only month and interest rate columns
keep month i i_star korea_interest us_interest

* save as intermediate
save "Data/IntermediateData/interest_diff.dta", replace

************************


*****MONTHLY EXCHANGE RATES*****

* import monthly exchange rates
import delimited "Data/InputData/fx_monthly.csv", clear

* format time as month
g month = monthly(time_period, "YM")
format month %tm

* rename value column
ren obs_value value

* set month as time variable to use lags
tsset month

* calculate ex post expected 3-month change in exchange rates
g Edelta = log(f3.value) - log(value)

* calculate 6-month lagged variance of Edelta
rangestat (variance) Edelta, interval(month -5 0)

* rename variables
ren Edelta_variance sigma_e
ren value fx

* keep month, expected exchange rate, variance, and exchange rate columns
keep month Edelta sigma_e fx

* save as intermediate
save "Data/IntermediateData/fx.dta", replace

********************************


*****SOUTH KOREAN MONETARY POLICY SHOCKS*****

* import Ahn et al. monetary policy shocks
import delimited "Data/InputData/shocks.csv", clear

* reformat date
ren date date_str
g date = date(date_str, "MD20Y")
format date %td

* associate shock with next month if later than the 25th
g merge_month = month(date)
replace merge_month = mod(merge_month+1, 12) if day(date) >=25
replace merge_month = 12 if merge_month==0
g year = year(date)
g month = ym(year, merge_month)
format month %tm

* set time variable as the month and fill missing months with missing values
tsset month
tsfill

* keep shock columns and month
keep surprise mp_shock cbi_shock month

* for months with missing shock data, take most recent value
carryforward surprise mp_shock cbi_shock, replace

* save as intermediate
save "Data/IntermediateData/shocks.dta", replace

*********************************************


*****EXCESS BOND PREMIUM*****

* import GZ excess bond premium
import delimited "Data/InputData/ebp.csv", clear

* reformat date as month
ren date date_str
g date = date(date_str,"YMD")
g month_dt = month(date)
g year_dt = year(date)
g month = ym(year_dt, month_dt)
format month %tm

* keep month and ebp columns
keep month ebp

* save as intermediate
save "Data/IntermediateData/ebp.dta", replace

*****************************

//
// import delimited "bonds.csv", clear
//
// ren recorddate date_str
// g date = date(date_str,"YMD")
// g month_dt = month(date)
// g year_dt = year(date)
// g month = ym(year_dt, month_dt)
// format month %tm
//
// keep if securitytypedescription == "Total Marketable"
// ren debtheldbythepublicinmillions debt
// keep month debt
//
// save "intermediate/bonds.dta", replace
//


*****SOUTH KOREAN BOND SUPPLY*****

import delimited "Data/InputData/debt_korea.csv", clear

* reformat value from string to number
ren debt_twon debt_twon_str
destring debt_twon_str, generate(debt_twon) ignore(",")

* reformat date as month
replace month = ym(year, month)
format month %tm

* drop year and string columns
drop year debt_twon_str

* save as intermediate
save "Data/IntermediateData/debt_korea.dta", replace

**********************************


*****US CONSUMER PRICES*****

* use FRED to import CPI data
import fred CPIAUCSL, clear

* reformat date as month
g month_dt = month(daten)
g year_dt = year(daten)
g month = ym(year_dt, month_dt)
format month %tm

* rename cpi and month columns and retain
ren CPIAUCSL cpi
keep month cpi

* set time variable as month for lags
tsset month

* calculate US inflation
g pi_star = log(cpi) - log(l.cpi)

* save as intermediate
save "Data/IntermediateData/cpi.dta", replace

****************************


*****ECONOMIC POLICY UNCERTAINTY INDEX*****

* import EPU index
import delimited "Data/InputData/epu.csv", clear

* rescale indices to average 1 over period (100 by default)
replace epu = epu/100
replace epu_monetary = epu_monetary/100
replace epu_fiscal = epu_fiscal/100
replace epu_trade = epu_trade/100
replace epu_forex = epu_forex/100

* drop pre-2000 observations
drop if _n < 121

* reformat date
ren date date_str
g date = date(date_str, "M20Y")
g month_dt = month(date)
g year_dt = year(date)
g month = ym(year_dt, month_dt)
format month %tm

* keep month and epu columns
keep month epu*

* save as intermediate
save "Data/IntermediateData/epu.dta", replace

*******************************************


*****MERGE AND CLEAN MONTHLY DATA*****

* start with MP shocks
use Data/IntermediateData/shocks.dta, clear

* merge preceding datasets
merge 1:1 month using "Data/IntermediateData/interest_diff.dta", nogen
merge 1:1 month using "Data/IntermediateData/fx.dta", nogen
merge 1:1 month using "Data/IntermediateData/ebp.dta", nogen
// merge 1:1 month using "Data/IntermediateData/bonds.dta", nogen
merge 1:1 month using "Data/IntermediateData/cpi.dta", nogen
merge 1:1 month using "Data/IntermediateData/debt_korea.dta", nogen
merge 1:1 month using "Data/IntermediateData/epu.dta", nogen

* calculate covariance of exchange rates 
rangestat (cov) Edelta pi_star, interval(month -5 0)
ren cov_x sigma_epi

* calculate UIP deviations and excess bond supply
g uip = i - i_star - Edelta
g noise = sigma_e * ebp
g bond_supply = - 1000000*sigma_e * (us_interest / korea_interest)*(1 / fx)*(debt_twon/cpi)

* for each Ahn et al. shock, calcuate the maximum magnitude shock over the past year
g abs = surprise
g max = max(abs, l.abs, l2.abs, l3.abs, l4.abs, l5.abs, l6.abs, l7.abs, l8.abs, l9.abs,l10.abs, l11.abs)
g min = min(abs, l.abs, l2.abs, l3.abs, l4.abs, l5.abs, l6.abs, l7.abs, l8.abs, l9.abs,l10.abs, l11.abs)
g surprise_max = max
replace surprise_max = min if abs(min) > abs(max)
replace surprise_max = . if month < `=ym(2011, 4)'
drop abs max min
g abs = mp_shock
g max = max(abs, l.abs, l2.abs, l3.abs, l4.abs, l5.abs, l6.abs, l7.abs, l8.abs, l9.abs,l10.abs, l11.abs)
g min = min(abs, l.abs, l2.abs, l3.abs, l4.abs, l5.abs, l6.abs, l7.abs, l8.abs, l9.abs,l10.abs, l11.abs)
g mp_max = max
replace mp_max = min if abs(min) > abs(max)
replace mp_max = . if month < `=ym(2011, 4)'
drop abs max min
g abs = cbi_shock
g max = max(abs, l.abs, l2.abs, l3.abs, l4.abs, l5.abs, l6.abs, l7.abs, l8.abs, l9.abs,l10.abs, l11.abs)
g min = min(abs, l.abs, l2.abs, l3.abs, l4.abs, l5.abs, l6.abs, l7.abs, l8.abs, l9.abs,l10.abs, l11.abs)
g cbi_max = max
replace cbi_max = min if abs(min) > abs(max)
replace cbi_max = . if month < `=ym(2011, 4)'
drop abs max min

* save as analysis
save "Data/AnalysisData/analysis_monthly.dta", replace

**************************************


*****QUARTERLY CONSUMPTION GROWTH*****

* import consumption growht
import delimited "Data/InputData/consumption_growth.csv", clear

* keep and rename relative columns
keep observationstatus obs_value time_period v28 institutionalsector sector ref_area
keep if sector=="S1M"
keep if v28=="Growth rate, period on period"
sort ref_area time_period
keep ref_area time_period obs_value

* rescale from percentage to real
replace obs_value = 0.01*obs_value

* reformat date to quarter
g year_str = substr(time_period, 1, 4)
g qtr_str = substr(time_period, -1, 1)
destring year_str, g(year)
destring qtr_str, g(qtr)
g date = yq(year, qtr)
format date %tq

* rename and filter columns
ren obs_value value
ren ref_area country
keep date country value

* reshape so that observations are quarters
reshape wide value, i(date) j(country, string)
ren valueUSA g_cons_us
ren valueKOR g_cons_kor

* save as intermediate
save "Data/IntermediateData/cons_growth.dta", replace

**************************************


*****QUARTERLY PRICE LEVEL*****

* import quarterly cpi and drop empty observation
import delimited "Data/InputData/cpi.csv", clear
drop if _n==203

* rescale from percentage to real
replace obs_value = 0.01*obs_value

* reformat date as quarter
g year_str = substr(time_period, 1, 4)
g qtr_str = substr(time_period, -1, 1)
destring year_str, g(year)
destring qtr_str, g(qtr)
g date = yq(year, qtr)
format date %tq

* rename and filter columns
ren obs_value value
ren countryid country
keep date country value

* reshape so that observations are quarters
reshape wide value, i(date) j(country, string)
ren valueUSA g_cpi_us
ren valueKOR g_cpi_kor

* save as intermediate
save "Data/IntermediateData/cpi_growth.dta", replace

*******************************


*****QUARTERLY EXCHANGE RATES*****

import delimited "Data/InputData/fx_quarterly.csv", clear

* reformat date as quarter
g year_str = substr(time_period, 1, 4)
g qtr_str = substr(time_period, -1, 1)
destring year_str, g(year)
destring qtr_str, g(qtr)
g date = yq(year, qtr)
format date %tq

* rename and filter columns
ren obs_value value
ren countryid country
keep date value
ren value E

* save as intermediate
save "Data/IntermediateData/fx_quarterly.dta", replace

**********************************


*****MONTHLY TO QUARTERLY DATA*****

* import monthly analysis data
use "Data/AnalysisData/analysis_monthly.dta", clear

* get the max and min values of each Ahn et al. shock by quarter
drop *max
g date = qofd(dofm(month))
format date %tq
bysort date: egen mp_shock_max = max(mp_shock)
bysort date: egen mp_shock_min = min(mp_shock)
bysort date: egen cbi_shock_max = max(cbi_shock)
bysort date: egen cbi_shock_min = min(cbi_shock)
bysort date: egen surprise_max = max(surprise)
bysort date: egen surprise_min = min(surprise)

* keep relevant columns and collapse to quarterly by most recent value
keep *_max *_min date bond_supply epu* ebp
sort date
collapse (last) epu* bond_supply *_max *_min ebp, by(date)

* save as intermediate
save "Data/IntermediateData/shocks_bonds_quarterly.dta", replace

****************************


*****MERGE AND CLEAN DATA*****

* merge quarterly data
use "Data/IntermediateData/cons_growth.dta", clear
merge 1:1 date using "Data/IntermediateData/cpi_growth.dta", nogen keep(1 3)
merge 1:1 date using "Data/IntermediateData/fx_quarterly.dta", nogen keep(1 3)
merge 1:1 date using "Data/IntermediateData/shocks_bonds_quarterly.dta", nogen keep(1 3)

* set time to quarters
tsset date, quarterly

* calculate change in log real exchange rate
g e = log(E)
g d_q = f1.g_cpi_us + (f1.e - e) - f1.g_cpi_kor

* calculate change in Backus-Smith residual
g d_z = 2 * (f1.g_cons_kor - f1.g_cons_us) - d_q

* for each Ahn et al. shock, calcuate the maximum magnitude shock over the past year
g abs1 = surprise_max
g abs2 = surprise_min
g max = max(abs1, l.abs1, l2.abs1, l3.abs1)
g min = min(abs2, l.abs2, l2.abs2, l3.abs2)
g surprise_4max = max
replace surprise_4max = min if abs(min) > abs(max)
drop abs* max min
g abs1 = mp_shock_max
g abs2 = mp_shock_min
g max = max(abs1, l.abs1, l2.abs1, l3.abs1)
g min = min(abs2, l.abs2, l2.abs2, l3.abs2)
g mp_shock_4max = max
replace mp_shock_4max = min if abs(min) > abs(max)
drop abs* max min
g abs1 = cbi_shock_max
g abs2 = cbi_shock_min
g max = max(abs1, l.abs1, l2.abs1, l3.abs1)
g min = min(abs2, l.abs2, l2.abs2, l3.abs2)
g cbi_shock_4max = max
replace cbi_shock_4max = min if abs(min) > abs(max)
drop abs* max min

* save as analysis
save "Data/AnalysisData/analysis_quarterly.dta", replace

******************************
